#----Main Work R Script----

# This is the main R work for the main project
# It contains data gathering, cleaning and wrapping
# It also contains main statistical work with GMM and its alternatives

#----Data Work----

# Part I: Uploading Datasets and filtering them by Year and Country Codes

# ISO codes for SEA

ISO_SEA<-c("BRN","KHM","IDN","LAO","MYS","MMR","PHL","SGP","THA","TLS","VNM")
ISO_ccode<-COW_country_codes%>%filter(StateNme%in%SEA_Full2)
ISOSEAnum<-countrycode::countrycode(ISO_SEA,origin = "iso3c",destination = "cown")

# SEA Full Names

SEA_Full<-c("Brunei","Cambodia","Indonesia","Laos","Malaysia","Burma","Philippines","Singapore","Thailand", "Timor-Leste","Vietnam" )
SEA_Full2<-c("Brunei","Cambodia","Indonesia","Laos","Malaysia","Myanmar","Philippines","Singapore","Thailand", "Timor-Leste","Vietnam")

# Em-DAT Data Set: Main Source of DV

library(skimr)

Emdat_data<-public_emdat_custom_request_2025_10_27_f7980be1_ea18_4e4e_bbca_b13020990f7b
skim(Emdat_data)
summary(Emdat_data)

# Filtering EM-DAT Data for SEA 

library(dplyr)
EMdat_SEA <- Emdat_data %>%
  filter(ISO %in% ISO_SEA) %>%
  filter(`Disaster Subgroup` != "Biological") 
stopifnot(!any(EMdat_SEA$`Disaster Subgroup` == "Biological", na.rm = TRUE))


# Using EM-DAT to create event speed: Rapid vs. Slow


table(EMdat_SEA$`Disaster Type`)
rapidlist<-c("Earthquake","Flood","Mass movement (dry)","Mass movement (wet)","Storm","Volcanic activity","Wildfire","Extreme temperature")
EMdat_SEA$RapidDis<-ifelse(EMdat_SEA$`Disaster Type`%in%rapidlist,1,0)
table(EMdat_SEA$RapidDis)


# Filtering VDEM variables of Interest

V_DEM_lim<-read.csv("V-Dem-CY-Full+Others-v15.csv")%>%filter(year>=1989&year<=2022)
V_DEM_lim<-subset(V_DEM_lim,select = c("year","country_name","country_id","country_text_id","v2x_polyarchy","v2stcritrecadm","v2strenadm","v2clrspct"))
V_DEM_SEA<-V_DEM_lim%>%filter(country_text_id%in%ISO_SEA)
colnames(V_DEM_SEA)
colnames(V_DEM_SEA)[4]<-"iso3c"
colnames(brunei_components)

# Bringing back Brunei

V_DEM_SEA<-V_DEM_SEA%>%select(year,iso3c,v2x_polyarchy,v2stcritrecadm,v2strenadm,v2clrspct)
V_DEM_SEA<-bind_rows(V_DEM_SEA,brunei_components)

# First Step: Putting EM-DAT and V-DEM together

library(janitor)
library(tidyverse)

colnames(EMdat_SEA)
summary(EMdat_SEA)
EMdat_SEA<-EMdat_SEA%>%select(DisNo.,`Disaster Group`,`Disaster Subgroup`,`Disaster Type`,`Disaster Subtype`,ISO,
                              Subregion,Region,`OFDA/BHA Response`,Appeal,Declaration,`Start Year`,`Total Deaths`,`Total Affected`,
                              `Total Damage ('000 US$)`,RapidDis)

EMdat_SEA<-clean_names(EMdat_SEA) # removing all ', ", and making names of columns compatible
colnames(EMdat_SEA)
str(EMdat_SEA$rapid_dis)

# Transforming EMdat_SEA to country per year data

EMdat_SEA_country_year <- EMdat_SEA %>%
  group_by(iso, start_year) %>%
  summarise(
    total_deaths   = sum(total_deaths, na.rm = TRUE),
    total_affected = sum(total_affected, na.rm = TRUE),
    total_damage   = sum(total_damage_000_us, na.rm = TRUE),
    
    # region info (usually constant per country)
    subregion = dplyr::first(subregion),
    region    = dplyr::first(region),
    
    # how many events in that country-year
    n_events  = dplyr::n(),
    
    # examples of combining other fields
    disno              = paste(unique(dis_no), collapse = "; "),
    disaster_group     = paste(unique(disaster_group), collapse = "; "),
    disaster_subgroup  = paste(unique(disaster_subgroup), collapse = "; "),
    disaster_type      = paste(unique(disaster_type), collapse = "; "),
    disaster_subtype   = paste(unique(disaster_subtype), collapse = "; "),
    
    # flags across the year (NOTE: now using a logical expression)
    any_ofda_bha = any(ofda_bha_response == "Yes", na.rm = TRUE),
    any_appeal   = any(appeal == "Yes", na.rm = TRUE),
    any_decl     = any(declaration == "Yes", na.rm = TRUE),
    any_rapiddis = any(rapid_dis==1, na.rm = TRUE),
    
    .groups = "drop"
  )
colnames(EMdat_SEA_country_year)
colnames(EMdat_SEA_country_year)[1]<-"iso3c"
colnames(EMdat_SEA_country_year)[2]<-"year"

# Creating the main data set: Disaster Data SEA

disaster_data_SEA<-full_join(V_DEM_SEA,EMdat_SEA_country_year,by=c("iso3c","year"))
str(disaster_data_SEA)

disaster_data_SEA <- disaster_data_SEA %>%
  mutate(
    # -----------------------
    # total_deaths corrections
    # -----------------------
    total_deaths = case_when(
      # Lao PDR
      iso3c == "LAO" & year == 1997 ~ 0,
      iso3c == "LAO" & year == 1998 ~ 0,
      
      # Malaysia
      iso3c == "MYS" & year == 2010 ~ 4,
      iso3c == "MYS" & year == 2012 ~ 2,
      
      # Thailand
      iso3c == "THA" & year == 2018 ~ 83,
      
      # Timor-Leste
      iso3c == "TLS" & year == 1995 ~ 11,
      
      # Countries where all NAs should be 0
      iso3c %in% c("BRN", "KHM", "MMR", "SGP") & is.na(total_deaths) ~ 0,
      
      # otherwise keep original value
      TRUE ~ total_deaths
    ),
    
    # --------------------------
    # total_affected corrections
    # --------------------------
    total_affected = case_when(
      # Lao PDR
      iso3c == "LAO" & year == 1997 ~ 607,
      iso3c == "LAO" & year == 1998 ~ 0,
      
      # Malaysia
      iso3c == "MYS" & year == 2010 ~ 50000,
      iso3c == "MYS" & year == 2012 ~ 13746,
      
      # Thailand
      iso3c== "THA" & year == 2018 ~ 1212371,
      
      # Timor-Leste
      iso3c == "TLS" &year == 1995 ~ 19,
      
      # Countries where all NAs should be 0
      iso3c %in% c("BRN", "KHM", "MMR", "SGP") & is.na(total_affected) ~ 0,
      
      TRUE ~ total_affected
    ),
    
    # ------------------------
    # total_damage corrections
    # (already in '000 US$)
    # ------------------------
    total_damage = case_when(
      # Lao PDR
      # 15.832.000 US$ -> 15832 in thousand US$
      iso3c == "LAO" & year == 1997 ~ 15832,
      # 24.687.000 US$ -> 24687
      iso3c == "LAO" & year == 1998 ~ 24687,
      
      # Malaysia
      # 8.086.000 US$ -> 8086
      iso3c == "MYS" & year == 2010 ~ 8086,
      # 2012: no reliable damage info -> leave as is (NA or EM-DAT value)
      
      # Countries where all NAs should be 0
      iso3c %in% c("BRN", "KHM", "MMR", "SGP") & is.na(total_damage) ~ 0,
      
      TRUE ~ total_damage
    )
  )

# Replacing NAs with Non-Applicable after checking all other sources to determine whether it is truly missing or not

disaster_data_SEA<-disaster_data_SEA %>%   # <- put your joined data name here
  mutate(
    # Two special columns (static)
    subregion=replace_na(subregion,"South-eastern Asia"),
    region=replace_na(region,"Asia"),
    # all numeric: NA -> 0
    across(
      where(is.numeric),
      ~ replace_na(., 0)
    ),
    # all character: NA -> "No-Applicable"
    across(
      where(is.character),
      ~ replace_na(., "No-Applicable")
    ),
    across(
      where(is.logical),
      ~replace_na(.,FALSE)
    )
  )
str(disaster_data_SEA)
skimr::skim(disaster_data_SEA)

# Filtering Economic Freedom Index

library(readxl)

economic_fredomdat<-read_excel("efotw-2024-master-index-data-for-researchers-iso.xlsx", col_names = T,skip = 4)
economic_fredomdat_SEA<-economic_fredomdat%>%filter(`ISO Code 3`%in%ISO_SEA)
economic_fredomdat_SEA<-economic_fredomdat_SEA%>%filter(Year>=1989&Year<=2022)
colnames(economic_fredomdat_SEA)

# Historical Variable I : Colonization History
library(skimr)
SEA_colony<-european_overseas_colonies_and_their_last_colonizer%>%filter(Code%in%ISO_SEA)
skim(SEA_colony)
SEA_colony$BritishMandate<-ifelse(SEA_colony$`Last European colonial power (grouped)`=="United Kingdom",1,0)
SEA_colony$FrenchMandate<-ifelse(SEA_colony$`Last European colonial power (grouped)`=="France",1,0)
SEA_colony<-SEA_colony%>%filter(Year>=1989&Year<=2022)
SEA_colony$NeverColonized<-ifelse(SEA_colony$`Last European colonial power (grouped)`=="zzzz. Never colonized",1,0)
SEA_colony$DutchMandate<-ifelse(SEA_colony$`Last European colonial power (grouped)`=="Netherlands",1,0)
SEA_colony$PortugeseMandt<-ifelse(SEA_colony$`Last European colonial power (grouped)`=="Portugal",1,0)
colnames(SEA_colony)
View(SEA_colony)
colnames(SEA_colony)[2]<-"iso3c"
colnames(SEA_colony)[3]<-"year"

# Putting into the main data set: Disaster Data SEA

disaster_data_SEA<-disaster_data_SEA%>%
  full_join(SEA_colony%>%select(iso3c,year,BritishMandate,FrenchMandate,DutchMandate,PortugeseMandt,NeverColonized),by=c("iso3c","year"))
summary(disaster_data_SEA)
skimr::skim(disaster_data_SEA)

# Economic Complexity Datasets

library(tidyr)
ECI_Trade<-read.csv("Data-ECI-Trade.csv",sep = ",")

ECI_Tradelong<-ECI_Trade%>%pivot_longer(
    cols = matches("^[xX]\\d{4}$"),
    names_to = "year",
    values_to = "value",
    names_prefix = "[xX]",                  # remove leading x or X
    names_transform = list(year = as.integer)
  )
ECI_TradeSEA<-ECI_Tradelong%>%filter(Country%in%SEA_Full)

ECI_Tech<-read.csv("Data-ECI-Technology.csv",sep = ",")
ECI_Techlong<-ECI_Tech%>%pivot_longer(
  cols = matches("^[xX]\\d{4}$"),
  names_to = "year",
  values_to = "value",
  names_prefix = "[xX]",                  # remove leading x or X
  names_transform = list(year = as.integer)
)
ECI_TechSEA<-ECI_Techlong%>%filter(Country%in%SEA_Full)
View(ECI_TradeSEA)

# Tax Revenue as % GDP

tax_revenue<-read.csv("tax-revenues-as-a-share-of-gdp-unu-wider.csv",sep = ",")
tax_revenue<-tax_revenue%>%filter(Year>=1989&Year<=2022)
tax_revenueSEA<-tax_revenue%>%filter(Code%in%ISO_SEA)
colnames(tax_revenueSEA)[4]<-"TaxRev%GDP"
tax_revenueSEA<-tax_revenueSEA[,-5]
View(tax_revenueSEA)
colnames(tax_revenueSEA)
colnames(tax_revenueSEA)[2]<-"iso3c"
colnames(tax_revenueSEA)[3]<-"year"
colnames(tax_revenueSEA)[4]<-"TaxRevGDPratio"
summary(tax_revenueSEA)

# Putting into the main data set: Disaster Data SEA

disaster_data_SEA<-disaster_data_SEA%>%
  full_join(tax_revenueSEA%>%select(iso3c,year,TaxRevGDPratio),by=c("iso3c","year"))
summary(disaster_data_SEA)
# World Bank Data Sets

library(WDI)
library(dplyr)
library(janitor)

search_list<-c("GDP","HDI","PPP","FDI","gross capital","Pop","Remittance")
WDI_selection<-WDIsearch(string ="population" ,field = "name",short = T,cache = NULL)
GDPofSEA<-WDI(country = ISO_SEA,indicator = "NY.GDP.MKTP.CD",start = 1989,end = 2022)
GDPpercapSEA<-WDI(country = ISO_SEA,indicator = "NY.GDP.PCAP.PP.CD",start = 1989,end = 2022)
FDISEA<-WDI(country = ISO_SEA,indicator = "BX.KLT.DINV.WD.GD.ZS",start = 1989,end = 2022)
PPPEnergySEA<-WDI(country = ISO_SEA,indicator = "IE.PPN.ENGY.CD",start = 1989,end = 2022)
PPPICTSEA<-WDI(country = ISO_SEA,indicator = "IE.PPN.ICTI.CD",start = 1989,end = 2022)
GCPSEA<-WDI(country = ISO_SEA,indicator = "NE.GDI.TOTL.ZS",start = 1989,end = 2022)
TotalpopSEA<-WDI(country = ISO_SEA,indicator = "SP.POP.TOTL",start = 1989,end = 2022)
TotalRemittanceSEA<-WDI(country = ISO_SEA,indicator = "BX.TRF.PWKR.DT.GD.ZS",start = 1989,end = 2022)

# Selected World Bank Data sets

GDPofSEA<-clean_names(GDPofSEA)
GDPpercapSEA<-clean_names(GDPpercapSEA)
FDISEA<-clean_names(FDISEA)
TotalpopSEA<-clean_names(TotalpopSEA)
GCPSEA<-clean_names(GCPSEA)
colnames(GDPofSEA)
colnames(GDPofSEA)[5]<-"GDP"
colnames(GDPpercapSEA)
colnames(GDPpercapSEA)[5]<-"gdp_pc_ppp"
colnames(FDISEA)
colnames(FDISEA)[5]<-"FDI"
colnames(TotalpopSEA)
colnames(TotalpopSEA)[5]<-"TotalPop"
colnames(GCPSEA)
colnames(GCPSEA)[5]<-"GCP"

# Knitting together of selected World Bank Data Sets

worldbankdataSEA<-GDPofSEA%>%select(iso3c,year,GDP)%>%
  full_join(FDISEA%>%select(iso3c,year,FDI),by=c("iso3c","year"))%>%
  full_join(GDPpercapSEA%>%select(iso3c,year,gdp_pc_ppp),by=c("iso3c","year"))%>%
  full_join(TotalpopSEA%>%select(iso3c,year,TotalPop),by=c("iso3c","year"))%>%
  full_join(GCPSEA%>%select(iso3c,year,GCP),by=c("iso3c","year"))%>%
  mutate(
    logGDP=log(GDP)
  )%>%
  mutate(ln_gdp_pc_ppp=log(gdp_pc_ppp))
summary(worldbankdataSEA)

# Knitting World Bank Data to general data frame

disaster_data_SEA<-disaster_data_SEA%>%
  full_join(worldbankdataSEA%>%select(iso3c,year,GDP,FDI,gdp_pc_ppp,TotalPop,GCP,
                                      logGDP,ln_gdp_pc_ppp),by=c("iso3c","year"))
summary(disaster_data_SEA)

# UN DATA

popgrowth<-read.csv("population-growth-rates.csv",sep = ",")
popgrowth<-popgrowth%>%filter(Year>=1989&Year<=2022)
popgrowthSEA<-popgrowth%>%filter(Code%in%ISO_SEA)

shareurban<-read.csv("share-of-population-urban.csv",sep = ",")
shareurban<-shareurban%>%filter(Year>=1989&Year<=2022)
shareurbanSEA<-shareurban%>%filter(Code%in%ISO_SEA)

# Cleaning and preparing UN Data

library(tidyverse)

colnames(popgrowthSEA)
popgrowthSEA<-clean_names(popgrowthSEA)
colnames(popgrowthSEA)[2]<-"iso3c"
colnames(popgrowthSEA)[4]<-"pop_growth_rate_all_sex"
popgrowthSEA<-popgrowthSEA[,-5]
colnames(shareurbanSEA)
summary(popgrowthSEA)
shareurbanSEA<-clean_names(shareurbanSEA)
colnames(shareurbanSEA)[2]<-"iso3c"
UNdataSEA<-popgrowthSEA%>%full_join(shareurbanSEA%>%select(iso3c,year,urban_population_of_total_population),by=c("iso3c","year"))
colnames(UNdataSEA)
summary(UNdataSEA)
skimr::skim(UNdataSEA)

# Knitting UN Data to general data frame

disaster_data_SEA<-disaster_data_SEA%>%
  full_join(UNdataSEA%>%select(iso3c,year,pop_growth_rate_all_sex,urban_population_of_total_population),
            by=c("iso3c","year"))
summary(disaster_data_SEA)

# Geospatial data sets

terrainrug<-read.csv("terrain-ruggedness-index.csv",sep = ",")
terrainrugSEA<-terrainrug%>%filter(Code%in%ISO_SEA)
colnames(terrainrugSEA)
terrainrugSEA<-clean_names(terrainrugSEA)
colnames(terrainrugSEA)[2]<-"iso3c"
colnames(terrainrugSEA)[4]<-"terrain_ruggedness"
summary(terrainrugSEA)

rurallandsbelow5m<-read.csv("rural-land-below-5-meters.csv",sep = ",")
rurallandsbelow5mSEA<-rurallandsbelow5m%>%filter(Code%in%ISO_SEA)
colnames(rurallandsbelow5mSEA)
rurallandsbelow5mSEA<-clean_names(rurallandsbelow5mSEA)
colnames(rurallandsbelow5mSEA)[2]<-"iso3c"
colnames(rurallandsbelow5mSEA)[4]<-"ruralarea_below5m_of_totalarea"

# Knitting geospatial data sets to general data frame

disaster_data_SEA2<-disaster_data_SEA%>%
  left_join(terrainrugSEA%>%select(iso3c,terrain_ruggedness),by=c("iso3c"))
disaster_data_SEA<-disaster_data_SEA%>%
  left_join(terrainrugSEA%>%select(iso3c,terrain_ruggedness),by=c("iso3c"))
summary(disaster_data_SEA2)
summary(disaster_data_SEA)

# Working with Global Sea Levels

library(lubridate)
library(anytime)
library(dplyr)
library(tidyr)

globalsealevels<-read.csv("sea-level.csv",sep = ",")

globalsealevels$Day<-anydate(globalsealevels$Day)
class(globalsealevels$Day)
globalsealevels$Day<-format(as.Date(globalsealevels$Day,'%Y-%m-%d'),"%d-%m-%Y")
globalsealevels<-globalsealevels%>%mutate(year=year(ymd(globalsealevels$Day)))
annual_sumsealevels<-globalsealevels%>%group_by(year)%>%
  summarise(across(where(is.numeric),~mean(.x,na.rm = T)),.groups = "drop")
class(annual_sumsealevels)
class(annual_sumsealevels$year)

# Calculating Bartik LECZ * Annual Sea Levels ( Hazard Sea Levels)

# Demeaning annual data

annual_averagesea<-subset(annual_sumsealevels,select = c(1,4))
annual_averagesea<-annual_averagesea%>%mutate(annual_averageseafull=annual_averagesea$Global.sea.level.as.an.average.of.Church.and.White..2011..and.UHSLC.data-
                                                mean(annual_averagesea$Global.sea.level.as.an.average.of.Church.and.White..2011..and.UHSLC.data,na.rm = T))
annual_averagesea<-annual_averagesea%>%filter(year>=1989&year<=2022)

# Building a country-year panel

countriesLECz<-unique(rurallandsbelow5mSEA$Entity)
est_years <- 1989:2022    # or c(1990, 2000, 2015) if you insist on 3 years
panel_datasealevel <- expand_grid(iso3c = countriesLECz, year = est_years)

## Fix exposure at 1990 (preferred baseline)

lecz_base <- rurallandsbelow5mSEA %>%
  filter(Year == 1990) %>%
  transmute(Entity, lecz_base = Rural.land.area.where.elevation.is.below.5.meters....of.total.land.area.)  # make sure this is a 0–1 share
colnames(lecz_base)[1]<-"iso3c"

## Join baseline exposure and sea-level anomaly, then create the control

panel_datasealevel <- panel_datasealevel %>%
  left_join(lecz_base, by = "iso3c") %>%
  left_join(annual_averagesea %>% select(year, annual_averageseafull), by = "year") %>%
  mutate(H_SL = lecz_base * annual_averageseafull)
colnames(panel_datasealevel)
colnames(panel_datasealevel)[1]<-"country"
panel_datasealevel$iso3c<-countrycode::countrycode(panel_datasealevel$country,
                                                   origin = "country.name",destination = "iso3c",
                                                   custom_match = c("Indonesia"="IDN"))

# Knitting H_SL data back to general data frame

intersect(disaster_data_SEA$iso3c, panel_datasealevel$iso3c)
intersect(disaster_data_SEA$year, panel_datasealevel$year)

disaster_data_SEA2<-disaster_data_SEA%>%
  left_join(panel_datasealevel%>%select(iso3c,year,H_SL),by=c("year","iso3c"))
summary(disaster_data_SEA2)

disaster_data_SEA<-disaster_data_SEA%>%
  left_join(panel_datasealevel%>%select(iso3c,year,H_SL),by=c("iso3c","year"))
summary(disaster_data_SEA)

# Disaster related Climate Variables

library(lubridate)

annual_tempSEA<-read_csv("lao-outputs/annual-temperature-anomalies.csv")
annual_tempSEA<-annual_tempSEA%>%filter(Code%in%ISO_SEA)%>%filter(Year>=1989&Year<=2022)
precipitation_SEA<-read_csv("lao-outputs/global-precipitation-anomaly.csv")
precipitation_SEA<-precipitation_SEA%>%filter(Code%in%ISO_SEA)%>%filter(Year>=1989&Year<=2022)
global_co2<-read.csv("lao-outputs/global-co2-concentration.csv")
global_co2$Day<-as.POSIXct(as.character(global_co2$Day), format="%Y-%m-%d")
global_co2<-global_co2%>%filter(as.Date(Day) >= as.Date("1989-01-15"),as.Date(Day) <= as.Date("2022-12-15"))
global_co2 <- global_co2 %>%
  mutate(Year = as.integer(format(Day, "%Y"))) %>%
  group_by(Year) %>%
  summarise(
    co2_annual = mean(Monthly.concentration.of.atmospheric.carbon.dioxide,
                      na.rm = TRUE)
  )
forestshare_SEA<-read.csv("forest-area-as-share-of-land-area.csv")
forestshare_SEA<-forestshare_SEA%>%filter(Code%in%ISO_SEA)%>%filter(Year>=1989&Year<=2022)

colnames(annual_tempSEA)
colnames(annual_tempSEA)[1]<-"country"
colnames(annual_tempSEA)[2]<-"iso3c"
colnames(annual_tempSEA)[3]<-"year"
colnames(annual_tempSEA)[4]<-"temparature_anomaly"
view(precipitation_SEA)
colnames(precipitation_SEA)
colnames(precipitation_SEA)[1]<-"country"
colnames(precipitation_SEA)[2]<-"iso3c"
colnames(precipitation_SEA)[3]<-"year"
colnames(precipitation_SEA)[4]<-"annual_preciptation_anomaly"
colnames(forestshare_SEA)
forestshare_SEA<-forestshare_SEA[,-5]
colnames(forestshare_SEA)[1]<-"country"
colnames(forestshare_SEA)[2]<-"iso3c"
colnames(forestshare_SEA)[3]<-"year"
colnames(forestshare_SEA)[4]<-"Forestlandratio"
colnames(global_co2)

# Knitting Disaster related Climate Variables to general data frame

climate_disaster_data<-annual_tempSEA%>%select(iso3c,year,temparature_anomaly,iso3c,year)%>%
  left_join(precipitation_SEA%>%select(iso3c,year,annual_preciptation_anomaly),by=c("iso3c","year"))%>%
  left_join(forestshare_SEA%>%select(iso3c,year,Forestlandratio),by=c("iso3c","year"))%>%
  left_join(global_co2%>%select(year,co2_annual),by="year")

all_years_climate<-seq(min(climate_disaster_data$year),max(climate_disaster_data$year))

full_grid_climate <- expand_grid(
  iso3c = unique(climate_disaster_data$iso3c),
  year    = all_years_climate
)

missing_years_climate <- full_grid_climate %>%
  anti_join(climate_disaster_data %>% distinct(iso3c, year),
            by = c("iso3c", "year"))
missing_years_climate

disaster_data_SEA2<-disaster_data_SEA%>%
  full_join(climate_disaster_data,by=c("iso3c","year"))
summary(disaster_data_SEA2)

disaster_data_SEA<-disaster_data_SEA%>%
  full_join(climate_disaster_data,by=c("iso3c","year"))

# Knitting IBTracs to general data frame

colnames(ibtracs_SEA)

disaster_data_SEA2<-disaster_data_SEA2%>%
  left_join(ibtracs_SEA%>%select(year,iso3c,n_storms_close,n_strong_close),by=c("iso3c","year"))

disaster_data_SEA<-disaster_data_SEA%>%
  left_join(ibtracs_SEA%>%select(year,iso3c,n_storms_close,n_strong_close),by=c("iso3c","year"))

disaster_data_SEA2<-disaster_data_SEA2 %>%   # <- put your joined data name here
  mutate(
    # Two special columns (static)
    n_storms_close=replace_na(n_storms_close,0),
    n_strong_close=replace_na(n_strong_close,0)
  )
summary(disaster_data_SEA2)

disaster_data_SEA<-disaster_data_SEA %>%   # <- put your joined data name here
  mutate(
    # Two special columns (static)
    n_storms_close=replace_na(n_storms_close,0),
    n_strong_close=replace_na(n_strong_close,0)
  )
summary(disaster_data_SEA)

# Political Instrument Post-9/11 shock instrument (Bartik Style)

library(countrycode)
library(peacesciencer)
library(dplyr)
library(scales)
library(janitor)
library(tidyverse)

dfAgree$ccode1<-countrycode(dfAgree$ccode1,origin = "cown",destination = "iso3c")
dfAgree$ccode2<-countrycode(dfAgree$ccode2,origin = "cown",destination = "iso3c")
alignment_with_USA<-dfAgree%>%filter(ccode2%in%ISO_SEA&ccode1=="USA")%>%filter(year%in%est_years)
alignment_with_USA$Firstcountry<-countrycode(alignment_with_USA$ccode1,origin = "cown",destination = "iso3c")
alignment_with_USA$Secondcountry<-countrycode(alignment_with_USA$ccode2,origin = "cown",destination = "iso3c")
defenseaggrements<-cow_ddy%>%add_atop_alliance()
defenseaggrements<-defenseaggrements%>%filter(year%in%est_years)%>%filter(ccode2%in%ISOSEAnum&ccode1=="2")
defenseaggrements$Firstcountry<-countrycode(defenseaggrements$ccode1,origin="cown",destination = "iso3c")
defenseaggrements$Secondcountry<-countrycode(defenseaggrements$ccode2,origin = "cown",destination = "iso3c")
defenseaggrements<-subset(defenseaggrements,select = c(1,2,3,4,9,10))

# Build pre-2001 exposure weight w_i^(0)

pre_winest<-1989:2000; 
exp_pre<-alignment_with_USA%>%filter(year%in%pre_winest)%>%
  group_by(Secondcountry)%>%summarise(un_sim_pre=mean(IdealPointDistance),.groups = "drop")%>%
  left_join(defenseaggrements%>%filter(year%in%pre_winest)%>%
              group_by(Secondcountry)%>%
              summarise(def_pact_pre=as.numeric(any(atop_defense==1)),.groups="drop"),by="Secondcountry")%>%
  mutate(z1=as.numeric(scale(un_sim_pre)),z2=as.numeric(scale(def_pact_pre)),
         w_pre=rowMeans(cbind(replace_na(z1,0),replace_na(z2,0)),na.rm = T),w_pre=rescale(w_pre,to=c(0,1)))%>%select(Secondcountry,w_pre)

#  Global shock G_t

G_post<-tibble(year=est_years,G=as.numeric(year>=2001))

# Build the instrument Z_it = w_i^(0) * G_t

Post911_Inst<-expand_grid(Secondcountry=ISO_SEA,year=est_years)%>%
  left_join(exp_pre,by="Secondcountry")%>%
  left_join(G_post,by="year")%>%
  mutate(Z_post_911=w_pre*G)%>%
  select(Secondcountry,year,Z_post_911)
colnames(Post911_Inst)
colnames(Post911_Inst)[1]<-"iso3c"

# Knitting Post 9-11 Bartik to general data

intersect(disaster_data_SEA2$iso3c, Post911_Inst$iso3c)
intersect(disaster_data_SEA2$year, Post911_Inst$year)

disaster_data_SEA2<-disaster_data_SEA2%>%full_join(Post911_Inst%>%select(iso3c,year,Z_post_911),by=c("iso3c","year"))
summary(disaster_data_SEA2)
disaster_data_SEA<-disaster_data_SEA%>%full_join(Post911_Inst%>%select(iso3c,year,Z_post_911),by=c("iso3c","year"))

# ENSO Data (web scrapping)

library(htm2txt)

url1<-"https://www.cpc.ncep.noaa.gov/data/indices/soi"
ENSO_data<-gettxt(url1)
class(ENSO_data)

# Transforming from Vector to Data Frame

library(dplyr)
library(tidyr)
library(lubridate)
library(stringi)

#Normalize odd spaces to regular spaces

txt <- stri_replace_all_charclass(ENSO_data, "\\p{Z}", " ")

# Ensure each year starts a new line (htm2txt can collapse line breaks)

txt <- gsub("(?<!\\d)(\\d{4})\\s+", "\n\\1 ", txt, perl = TRUE)

# Split, trim, and keep lines that look like: YYYY + 12 numeric fields

lines <- stri_split_lines1(txt)%>% stri_trim_both()
lines <- lines[stri_detect_regex(lines, "^\\d{4}(\\s+-?\\d+(?:\\.\\d+)*){12}\\s*$")]

# Parse to wide table (year + 12 months)

tabular1 <- read.table(text = paste(lines, collapse = "\n"),
                  header = FALSE, fill = TRUE,
                  col.names = c("year", month.abb),
                  colClasses = c("integer", rep("numeric", 12)))

# Calculating Annual Means

tabular1$annualmean<-rowMeans(tabular1[,2:13],na.rm = T)

# Knitting ENSO to the general data frame

colnames(tabular1)
colnames(tabular1)[14]<-"ENSO_annualmean"
ENSO_mean<-tabular1%>%filter(year>=1989&year<=2022)
ENSO_mean<-ENSO_mean[35:68,]
disaster_data_SEA2<-disaster_data_SEA%>%left_join(ENSO_mean%>%select(year,ENSO_annualmean),by=c("year"))
disaster_data_SEA<-disaster_data_SEA%>%left_join(ENSO_mean%>%select(year,ENSO_annualmean),by=c("year"))

#----Collecting data for the Orthogonal Indexes/Variables----

# Demographics: Age Groups - Data Preparation

library(dplyr)
library(WDI)
popagegroup<-read.csv("population-by-age-group.csv",header = T,sep = ",")
popagegroupSEA<-popagegroup%>%filter(Code%in%ISO_SEA)%>%filter(Year>=1989&Year<=2022)
colnames(popagegroupSEA)[1]<-"country"
colnames(popagegroupSEA)[2]<-"iso3c"
colnames(popagegroupSEA)[3]<-"year"
popagegroupSEA$country<-as.character(popagegroupSEA$country)
popagegroupSEA$iso3c<-as.character(popagegroupSEA$iso3c)
popagegroupSEA<-left_join(popagegroupSEA,TotalpopSEA,by=c("iso3c","year"))
popagegroupSEA<-popagegroupSEA[,-c(9,10)]  
popagegroupSEA$u5<-(popagegroupSEA$Population...Sex..all...Age..0.4...Variant..estimates/popagegroupSEA$SP.POP.TOTL)*100
popagegroupSEA$u65<-(popagegroupSEA$Population...Sex..all...Age..65....Variant..estimates/popagegroupSEA$SP.POP.TOTL)*100  
popagegroupSEA$DVI_raw<-0.5*(popagegroupSEA$u5+popagegroupSEA$u65)
summary(popagegroupSEA)
colnames(popagegroupSEA)

# Knitting Age Groups to general data

ageGroupSEA<-subset(popagegroupSEA,select = c(iso3c,year,u5,u65,DVI_raw))
disaster_data_SEA2<-disaster_data_SEA2%>%full_join(ageGroupSEA%>%select(iso3c,year,u5,u65,DVI_raw),by=c("iso3c","year"))
summary(disaster_data_SEA2)
disaster_data_SEA<-disaster_data_SEA%>%full_join(ageGroupSEA%>%select(iso3c,year,u5,u65,DVI_raw),by=c("iso3c","year"))

# Hospital Beds

hospitalbeds10k<-read.csv("hospitalbeds(per 10000).csv")
hospitalbeds10k<-hospitalbeds10k[,c(2,7,10,30)]
colnames(hospitalbeds10k)[2]<-"country"
colnames(hospitalbeds10k)[3]<-"year"
colnames(hospitalbeds10k)[4]<-"TotalBedper10k"
hospitalbeds10kSEA<-hospitalbeds10k%>%filter(country%in%ISO_SEA)%>%filter(year>=1989&year<=2022)
hospitalbeds10kSEA$TotalBedper1k<-hospitalbeds10kSEA$TotalBedper10k/10

## Checking with direct download from WD-WHO

hospitalbeds1kSEA<-WDI(country = ISO_SEA,indicator = "SH.MED.BEDS.ZS",start = 1989,end = 2022)
colnames(hospitalbeds1kSEA)
colnames(hospitalbeds1kSEA)[5]<-"TotalBedper1k"
summary(hospitalbeds1kSEA)
# Knitting Hospital Bed data into the general data frame

disaster_data_SEA2<-disaster_data_SEA2%>%full_join(hospitalbeds1kSEA%>%select(iso3c,year,TotalBedper1k),by=c("iso3c","year"))
disaster_data_SEA<-disaster_data_SEA%>%full_join(hospitalbeds1kSEA%>%select(iso3c,year,TotalBedper1k),by=c("iso3c","year"))

# Information Reach Index

library(tidyr)
library(stringi)
library(readr)

internetSEA<-read.csv("Individuals using the Internet.csv",na.strings = c("", "NA", "n/a", "-"))
internetSEA<-pivot_longer(internetSEA,-c(Economy),values_to = "Value",names_to = "Year")
internetSEA$Year<-gsub( "X", "", as.character(internetSEA$Year))
internetSEA$Year<-as.numeric(internetSEA$Year)
internetSEA$Value<-gsub("%","",as.character(internetSEA$Value))
internetSEA$Value<-as.numeric(internetSEA$Value)
internetSEA<-internetSEA%>%mutate(value_fixed=formatC(Value,digits = 5,format = "f"))
internetSEA$value_fixed<-as.numeric(internetSEA$value_fixed)
internetSEA<-internetSEA[,-3]
internetSEA<-internetSEA%>%replace(is.na(.),0)
colnames(internetSEA)[1]<-"Country"
colnames(internetSEA)[2]<-"year"
colnames(internetSEA)[3]<-"Internetusers%"

mobileSEA<-read.csv("Mobile-cellular subscriptions.csv")
mobileSEA<-mobileSEA%>%mutate(across(matches("^X\\d{4}$"),
                           ~ readr::parse_number(as.character(.)))) %>%pivot_longer(
    cols = matches("^X\\d{4}$"),
    names_to = "year",
    names_prefix = "X",
    names_transform = list(year = as.integer),
    values_to = "value"
  )
mobileSEA<-mobileSEA%>%mutate(value_fixed=formatC(value,digits = 5,format = "f"))
mobileSEA<-mobileSEA[,-3]
colnames(mobileSEA)[1]<-"Country"
colnames(mobileSEA)[2]<-"year"
colnames(mobileSEA)[3]<-"Mobilesubsper100"
mobileSEA$Mobilesubsper100<-as.numeric(mobileSEA$Mobilesubsper100)

InformationResearchIndexSEA<-left_join(internetSEA,mobileSEA,by=c("Country","year"))
Z_score<-function(x)as.numeric(scale(x))
InformationResearchIndexSEA<-InformationResearchIndexSEA%>%mutate(Z_int=Z_score(`Internetusers%`),Z_mobile=Z_score(Mobilesubsper100))
InformationResearchIndexSEA<-InformationResearchIndexSEA%>%mutate(RII=0.5*(Z_int+Z_mobile))
InformationResearchIndexSEA$iso3c<-countrycode::countrycode(InformationResearchIndexSEA$Country,origin = "country.name.en",
                                                            destination = "iso3c",custom_match = c("Indonesia"="IDN"))
colnames(InformationResearchIndexSEA)
colnames(InformationResearchIndexSEA)[3]<-"Internetusersperct"

# Knitting Information Reseach Index data into the general data frame

disaster_data_SEA2<-disaster_data_SEA2%>%full_join(
  InformationResearchIndexSEA%>%select(year,Internetusersperct,Mobilesubsper100,Z_int,Z_mobile,RII,iso3c),by=c("iso3c","year")
)
summary(disaster_data_SEA2)

disaster_data_SEA<-disaster_data_SEA%>%full_join(
  InformationResearchIndexSEA%>%select(year,Internetusersperct,Mobilesubsper100,Z_int,Z_mobile,RII,iso3c),by=c("iso3c","year")
)
summary(disaster_data_SEA)
# Calculating frag_indexᵢ ∝ coastline length / land area

library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(dplyr)

country_iso3 <- c("BRN","KHM","IDN","LAO","MYS","MMR","PHL","SGP","THA","TLS","VNM")

world <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf"
)

sea_sf <- world %>%
  filter(iso_a3 %in% country_iso3)

# area (km²)
sea_sf$area_km2 <- as.numeric(st_area(sea_sf)) / 1e6

# coastline length: cast to MULTILINESTRING, then length (km)
sea_lines <- st_cast(sea_sf, "MULTILINESTRING")
sea_lines$coast_km <- as.numeric(st_length(sea_lines)) / 1000

# merge coast_km back into main sf (same row order)
sea_sf$coast_km <- sea_lines$coast_km

# raw fragmentation proxy: coastline per km²
sea_sf$frag_raw <- sea_sf$coast_km / sea_sf$area_km2

# normalise to [0,1] across your 11
frag_min <- min(sea_sf$frag_raw, na.rm = TRUE)
frag_max <- max(sea_sf$frag_raw, na.rm = TRUE)
sea_sf$frag_index <- (sea_sf$frag_raw - frag_min) / (frag_max - frag_min)

# Extract a data frame to merge into your panel:

frag_df <- sea_sf %>% st_drop_geometry() %>% select(iso3 = iso_a3, frag_index)

# Knitting Frag Index into the general data frame

colnames(frag_df)
colnames(frag_df)[1]<-"iso3c"

disaster_data_SEA2<-disaster_data_SEA2%>%left_join(frag_df%>%select(iso3c,frag_index),by=c("iso3c"))
disaster_data_SEA<-disaster_data_SEA%>%left_join(frag_df%>%select(iso3c,frag_index),by=c("iso3c"))
summary(disaster_data_SEA2)
summary(disaster_data_SEA)

# Calculating GSCPI from monthly data to annual

library(dplyr)
library(lubridate)
library(readxl)

gscpi_monthly <- read_excel("gscpi_data.xls", 
                         sheet = "GSCPI Monthly Data")
gscpi_monthly<-gscpi_monthly[,c(1,2)]
gscpi_monthly<-gscpi_monthly[-c(1:4),]
class(gscpi_monthly$Date)
gscpi_monthly$Date<-dmy(as.character(gscpi_monthly$Date))
gscpi_year <- gscpi_monthly %>%
  mutate(year = year(Date)) %>%
  group_by(year) %>%
  summarise(freight_index = mean(GSCPI, na.rm = TRUE), .groups = "drop")

# Knitting Freight Index into the general data frame

colnames(gscpi_year)
disaster_data_SEA2<-disaster_data_SEA2%>%left_join(gscpi_year%>%select(year,freight_index),by=c("year"))
summary(disaster_data_SEA2)
disaster_data_SEA<-disaster_data_SEA%>%left_join(gscpi_year%>%select(year,freight_index),by=c("year"))

# Political Stability and Absence of Violence/Terrorism

library(dplyr)
library(WDI)
PSAV_SEA<-WDI(country = ISO_SEA,indicator = "PV.PER.RNK",start = 1989,end = 2022)
colnames(PSAV_SEA)[5]<-"PSAV"
colnames(PSAV_SEA)

# Knitting PSAV into the general data frame

disaster_data_SEA2<-disaster_data_SEA2%>%full_join(PSAV_SEA%>%select(iso3c,year,PSAV),by=c("iso3c","year"))
summary(disaster_data_SEA2)
disaster_data_SEA<-disaster_data_SEA%>%full_join(PSAV_SEA%>%select(iso3c,year,PSAV),by=c("iso3c","year"))

# Limiting the general data frame to the period between 1991 and 2021

disaster_data_SEA2_1991_2021<-disaster_data_SEA2%>%filter(year>=1991&year<=2021)
summary(disaster_data_SEA2_1991_2021)
disaster_data_SEA_1991_2021<-disaster_data_SEA%>%filter(year>=1991&year<=2021)

# Calculating BAI INDEX for SEA 11

library(tidyverse)
library(scales)
library(janitor)

disaster_data_SEA2_1991_2021<-disaster_data_SEA2_1991_2021%>%
  mutate(
    Z_strictSEA=Z_score2(v2stcritrecadm),
    Z_stenSEA=Z_score2(v2strenadm),
    Z_clrspSEA=Z_score2(v2clrspct),
    BAI_SEA=(Z_strictSEA+Z_stenSEA+Z_clrspSEA)/3
  )
summary(disaster_data_SEA2_1991_2021)

disaster_data_SEA_1991_2021<-disaster_data_SEA_1991_2021%>%
  mutate(
    Z_strictSEA=Z_score2(v2stcritrecadm),
    Z_stenSEA=Z_score2(v2strenadm),
    Z_clrspSEA=Z_score2(v2clrspct),
    BAI_SEA=(Z_strictSEA+Z_stenSEA+Z_clrspSEA)/3
  )
summary(disaster_data_SEA_1991_2021)

# Calculating DII for SEA 11

colnames(disaster_data_SEA2_1991_2021)

winsor_function1 <- function(x, p = 0.01) {
  ql <- quantile(x, p,    na.rm = TRUE)
  qu <- quantile(x, 1-p,  na.rm = TRUE)
  pmax(pmin(x, qu), ql)
}

disaster_data_SEA2_1991_2021<-disaster_data_SEA2_1991_2021%>%mutate(
  d_pc=1e5*(total_deaths/TotalPop),
  a_pc=1e5*(total_affected/TotalPop),
  dmg_gdp=100*((total_damage*1e3)/GDP)
)

disaster_data_SEA2_1991_2021<-disaster_data_SEA2_1991_2021%>%mutate(
  d_t=asinh(d_pc),
  a_t=asinh(a_pc),
  dmg_t=asinh(dmg_gdp)
)

disaster_data_SEA2_1991_2021<-disaster_data_SEA2_1991_2021%>%mutate(
  d_t_w=winsor_function1(d_t,p=0.01),
  a_t_w=winsor_function1(a_t,p = 0.01),
  dmg_t_w=winsor_function1(dmg_t,p=0.01),
  Z_d=Z_score2(d_t_w),
  Z_a=Z_score2(a_t_w),
  Z_dmg=Z_score2(dmg_t_w),
  DII=(Z_d+Z_a+Z_dmg)/3
)
summary(disaster_data_SEA2_1991_2021)

normalize01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  if (rng[1] == rng[2]) return(rep(0.5, length(x)))  # all values equal
  (x - rng[1]) / (rng[2] - rng[1])
}
library(dplyr)
disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>% 
  mutate(
    DII_01 = normalize01(DII)
  )
disaster_data_SEA_1991_2021<-disaster_data_SEA_1991_2021%>%mutate(
  d_pc=1e5*(total_deaths/TotalPop),
  a_pc=1e5*(total_affected/TotalPop),
  dmg_gdp=100*((total_damage*1e3)/GDP)
)

disaster_data_SEA_1991_2021<-disaster_data_SEA_1991_2021%>%mutate(
  d_t=asinh(d_pc),
  a_t=asinh(a_pc),
  dmg_t=asinh(dmg_gdp)
)

disaster_data_SEA_1991_2021<-disaster_data_SEA_1991_2021%>%mutate(
  d_t_w=winsor_function1(d_t,p=0.01),
  a_t_w=winsor_function1(a_t,p = 0.01),
  dmg_t_w=winsor_function1(dmg_t,p=0.01),
  Z_d=Z_score2(d_t_w),
  Z_a=Z_score2(a_t_w),
  Z_dmg=Z_score2(dmg_t_w),
  DII=(Z_d+Z_a+Z_dmg)/3
)
summary(disaster_data_SEA2_1991_2021)
colnames(disaster_data_SEA2_1991_2021)
# This is too disaster driven DII. Thus we alternative we do the following

lm_d  <- lm(d_t_w   ~  ENSO_annualmean + terrain_ruggedness + rapid_year, data = disaster_data_SEA2_1991_2021,na.action = na.exclude)
lm_a  <- lm(a_t_w   ~ ENSO_annualmean + terrain_ruggedness + rapid_year, data = disaster_data_SEA2_1991_2021,na.action = na.exclude)
lm_dm <- lm(dmg_t_w ~ ENSO_annualmean + terrain_ruggedness + rapid_year, data = disaster_data_SEA2_1991_2021,na.action = na.exclude)

res_lm_d   <- residuals(lm_d)
res_lm_a   <- residuals(lm_a)
res_lm_dm <- residuals(lm_dm)

Z_res_lm_d<-Z_score2(res_lm_d)
Z_res_lm_a<-Z_score2(res_lm_a)
Z_res_lm_dm<-Z_score2(res_lm_dm)
disaster_data_SEA2_1991_2021_2<-disaster_data_SEA2_1991_2021
disaster_data_SEA2_1991_2021_2$DII_resid<-(Z_res_lm_a+Z_res_lm_d+Z_res_lm_dm)/3
disaster_data_SEA2_1991_2021$DII_resid<-(Z_res_lm_a+Z_res_lm_d+Z_res_lm_dm)/3

# The previous DII_resid was also too disaster driven. In other words, when we took out the variation explained by disaster related
# variables, the remaining was not enough to be explained by EDI or any other variable. That is why we develop another approach
# It is focusing on tail events (high disasters) but also having multiple ordinal categories

# Threshold approach
library(dplyr)
thresh <- quantile(disaster_data_SEA2_1991_2021$DII, 0.5, na.rm = TRUE)
disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  mutate(
    high_DII = as.integer(DII_cat=="high")
  )

summary(disaster_data_SEA2_1991_2021)
table(disaster_data_SEA2_1991_2021$high_DII)
# Ordinal approach

disaster_data_SEA2_1991_2021$DII_cat <- cut(
  disaster_data_SEA2_1991_2021$DII,
  breaks = c(-Inf, -0.5, 0.5, Inf),
  labels = c("low", "medium", "high"),
  right = TRUE,
  ordered_result = TRUE
)
table(disaster_data_SEA2_1991_2021$DII_cat)
# Changing the v2x_polyarchy to EDI

colnames(disaster_data_SEA2_1991_2021)[3]<-"EDI_SEA"
colnames(disaster_data_SEA_1991_2021)[3]<-"EDI_SEA"

#----Creating Demographic Vulnerability Index (DVI_{t-1}), Health Surge Capacity (HSC_{t-1}), Information Reach Index (IRI_{t-1})----
#Conflict Intensity (CI_{t-1}), Institutional Stability (GV_{t}), Neighbour Spillover (NS_{t}), Logistics Friction Shock (LFS_{it})
# Creates: DVI_raw, DVI_resid, DVI_t1 (z-scored)
# Panel keys required: iso3, year
# Inputs required: u5 (share of total pop ages 0–4), o65 (share ages 65+),
#                  ln_gdppc, urban_share, EDI, BAI
# Notes:
#  • u5/o65 should be proportions in [0,1]. If they are percentages [0,100],
#    set `shares_in_0_100 <- TRUE` below to auto-convert.
#  • Residualizes on {ln_gdppc, urban_share, EDI, BAI} + country & year FE.
#  • Lags within country by 1 year, then z-scores across the panel.
#  • No external packages required.
# ============================================================

# ----SETTINGS ----
shares_in_0_100 <- TRUE  # change to TRUE if u5, o65 are in percentages
attach(disaster_data_SEA2_1991_2021)
# ---- INPUT CHECKS ----
required_cols <- c("iso3c","year","u5","u65","ln_gdp_pc_ppp","urban_population_of_total_population","EDI_SEA","BAI_SEA")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("DVI block: missing required columns: %s", paste(missing, collapse=", ")))
}

# ---- 1) Normalize u5 / o65 to proportions if needed ----
if (shares_in_0_100) {
  disaster_data_SEA2_1991_2021$u5_n  <- disaster_data_SEA2_1991_2021$u5/ 100
  disaster_data_SEA2_1991_2021$u65_n <- disaster_data_SEA2_1991_2021$u65 / 100
}
# Soft guardrails for out-of-range values
if (any(disaster_data_SEA2_1991_2021$u5_n < 0 | disaster_data_SEA2_1991_2021$u5_n > 1, na.rm = TRUE))  warning("DVI block: 'u5' contains values outside [0,1].")
if (any(disaster_data_SEA2_1991_2021$u65_n < 0 | disaster_data_SEA2_1991_2021$u65_n > 1, na.rm = TRUE)) warning("DVI block: 'o65' contains values outside [0,1].")

# ---- 2) Construct DVI_raw (equal-weight composite) ----
disaster_data_SEA2_1991_2021$DVI_raw_n <- 0.5 * (disaster_data_SEA2_1991_2021$u5_n + disaster_data_SEA2_1991_2021$u65_n) 

# ---- 3) Residualize on macro + institutions + FE ----
# Use na.exclude so residuals realign with rows
dvi_lm <- lm(DVI_raw_n ~ ln_gdp_pc_ppp+urban_population_of_total_population + EDI_SEA + BAI_SEA + factor(iso3c) + factor(year),
             data = disaster_data_SEA2_1991_2021, na.action = na.exclude)
disaster_data_SEA2_1991_2021$DVI_resid <- residuals(dvi_lm)

# ---- 4) Lag within country by 1 year ----
# Within-country lag using split-apply
DVI_t1 <- numeric(nrow(disaster_data_SEA2_1991_2021))
DVI_t1[] <- NA_real_
split_idx <- split(seq_len(nrow(disaster_data_SEA2_1991_2021)), disaster_data_SEA2_1991_2021$iso3c)
for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  DVI_t1[i] <- c(NA, head(disaster_data_SEA2_1991_2021$DVI_resid[i], -1))
}
disaster_data_SEA2_1991_2021$DVI_t1 <- DVI_t1

# ---- 5) z-score across the panel (after lag) ----
z_DVI <- scale(disaster_data_SEA2_1991_2021$DVI_t1)
# scale() returns a matrix; coerce to numeric while preserving NAs
disaster_data_SEA2_1991_2021$DVI_t1 <- as.numeric(z_DVI)

# ---- 6) Quick diagnostics (optional; comment out if not needed) ----
cat("DVI block: non-missing DVI_t1 n =", sum(!is.na(disaster_data_SEA2_1991_2021$DVI_t1)), "\n")
cat("Correlations post-resid (pairwise complete obs):\n")
suppressWarnings(print(cor(df[,c("DVI_t1","EDI_SEA","BAI_SEA","ln_gdp_pc_ppp","urban_population_of_total_population")],
use = "pairwise.complete.obs")))

# End of DVI Block

# Creates: HSC_per1k, HSC_resid, HSC_t1 (z-scored)
# Panel keys required: iso3, year
# Inputs required: hospital_beds_per1k,
#                  ln_gdppc, urban_share, EDI, BAI
# Notes:
#  • Residualizes on {ln_gdppc, urban_share, EDI, BAI} + country & year FE.
#  • Lags within country by 1 year, then z-scores across the panel.
#  • No external packages required.
# ============================================================

required_cols <- c("iso3c","year","TotalBedper1k","ln_gdp_pc_ppp","urban_population_of_total_population","EDI_SEA","BAI_SEA")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("HSC block: missing required columns: %s", paste(missing, collapse=", ")))
}

# ---- 1) Residualize on macro + institutions + FE ----

hsc_lm <- lm(
  TotalBedper1k ~ ln_gdp_pc_ppp + urban_population_of_total_population + EDI_SEA + BAI_SEA +
    factor(iso3c) + factor(year),
  data = disaster_data_SEA2_1991_2021, na.action = na.exclude
)
disaster_data_SEA2_1991_2021$HSC_resid <- residuals(hsc_lm)

#---- 2) Builds the lagged, panel-wide z-scored term ----

HSC_t1 <- numeric(nrow(disaster_data_SEA2_1991_2021))
HSC_t1[] <- NA_real_
split_idx <- split(seq_len(nrow(disaster_data_SEA2_1991_2021)), disaster_data_SEA2_1991_2021$iso3c)
for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  HSC_t1[i] <- c(NA, head(disaster_data_SEA2_1991_2021$HSC_resid[i], -1))
}
disaster_data_SEA2_1991_2021$HSC_t1 <- HSC_t1

z_hsci <- scale(disaster_data_SEA2_1991_2021$HSC_t1)
disaster_data_SEA2_1991_2021$HSC_t1 <- as.numeric(z_hsci)
summary(disaster_data_SEA2_1991_2021$HSC_t1)

#---3) Quick diagnostics (optional; comment out if not needed) ----
  # cat("HSC block: non-missing HSC_t1 n =", sum(!is.na(df$HSC_t1)), "\n")
  # cat("Correlations post-resid (pairwise complete obs):\n")
  # suppressWarnings(print(cor(df[,c("HSC_t1","EDI","BAI","ln_gdppc","urban_share")],
  #                             use = "pairwise.complete.obs")))

# End of HSC block

# ============================================================
# IRI block (standalone) — Information Reach Index
# Creates: IRI_resid, IRI_t1 (lagged residual)
#
# Requirements:
#   df$iso3, df$year
#   df$IRI              # your already-scaled/z-scored IRI index
#   df$ln_gdppc         # log GDP per capita
#   df$urban_share      # urbanization share / %
#   df$EDI, df$BAI      # institutional indices
# Optional:
#   df$education_years  # included in controls if present
#
# Behaviour:
#   1) Residualizes IRI on {ln_gdppc, urban_share, EDI, BAI, [education_years]}
#      + country FE + year FE  → IRI_resid
#   2) Builds IRI_t1 as 1-year within-country lag of IRI_resid.
#   3) No extra z-scoring (you already scaled IRI).
# ============================================================

## ---- INPUT CHECKS ----
required_cols <- c("iso3c","year","RII","ln_gdp_pc_ppp","urban_population_of_total_population","EDI_SEA","BAI_SEA")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("IRI block: missing required columns: %s",
               paste(missing, collapse = ", ")))
}

## ---- 1) Build regression formula for residualization ----
controls <- c("ln_gdp_pc_ppp", "urban_population_of_total_population", "EDI_SEA", "BAI_SEA")
if ("education_years" %in% names(df)) {
  controls <- c(controls, "education_years")
}

rhs <- paste(controls, collapse = " + ")
rhs <- paste(rhs, "+ factor(iso3c) + factor(year)")
fml <- as.formula(paste("RII ~", rhs))

## ---- 2) Residualize IRI on macro + institutions + FE ----
rii_lm <- lm(fml, data = disaster_data_SEA2_1991_2021, na.action = na.exclude)
disaster_data_SEA2_1991_2021$RII_resid <- residuals(rii_lm)

## ---- 3) Lag residuals within country by 1 year ----
RII_t1 <- numeric(nrow(disaster_data_SEA2_1991_2021))
RII_t1[] <- NA_real_

split_idx <- split(seq_len(nrow(disaster_data_SEA2_1991_2021)), disaster_data_SEA2_1991_2021$iso3c)
for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  RII_t1[i] <- c(NA, head(disaster_data_SEA2_1991_2021$RII_resid[i], -1))
}

disaster_data_SEA2_1991_2021$RII_t1 <- RII_t1
summary(disaster_data_SEA2_1991_2021$RII_t1)

## ---- 4) Optional diagnostics (comment out if not needed) ----
# cat("IRI block: non-missing RII_t1 n =",
#     sum(!is.na(df$IRI_t1)), "\n")
# suppressWarnings(print(
#   cor(df[, c("IRI_t1","EDI","BAI","ln_gdppc","urban_share")],
#       use = "pairwise.complete.obs")
# ))

# End of RII block


# ============================================================
# STAB block (standalone) — Political Stability / Absence of Violence
# Replacement for CI using World Bank Governance Indicator (WGI).
#
# Creates: STAB_resid, STAB_t1
#
# Requirements in df:
#   iso3c, year
#   PSAV         # WGI "Political Stability and Absence of Violence/Terrorism"
#   ln_gdp_pc_ppp     # log GDP per capita
#   urban_population of total population  # urbanisation share
#   EDI_SEA, BAI_SEA     # your institutional indices
#
# Behaviour:
#   1) Residualize PSAV on {ln_gdp_pc_ppp, urban_population of total population, EDI_SEA, BAI_SEA} + FE(iso3c,year)
#      → STAB_resid.
#   2) Lag STAB_resid within country by 1 year → STAB_t1.
#   3) No extra z-scoring (PSAV already in SD-like units).
# ============================================================

required_cols <- c("iso3c","year","PSAV","ln_gdp_pc_ppp","urban_population_of_total_population","EDI_SEA","BAI_SEA")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("STAB block: missing required columns: %s",
               paste(missing, collapse = ", ")))
}

# 1) Residualize PSAV on macro + institutions + FE
stab_lm <- lm(
  PSAV ~ ln_gdp_pc_ppp + urban_population_of_total_population + EDI_SEA + BAI_SEA +
    factor(iso3c) + factor(year),
  data = disaster_data_SEA2_1991_2021,
  na.action = na.exclude
)
disaster_data_SEA2_1991_2021$STAB_resid <- residuals(stab_lm)

# 2) Lag within country by 1 year
STAB_t1 <- numeric(nrow(disaster_data_SEA2_1991_2021))
STAB_t1[] <- NA_real_

split_idx <- split(seq_len(nrow(disaster_data_SEA2_1991_2021)), disaster_data_SEA2_1991_2021$iso3c)
for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  STAB_t1[i] <- c(NA, head(disaster_data_SEA2_1991_2021$STAB_resid[i], -1))
}
disaster_data_SEA2_1991_2021$STAB_t1 <- STAB_t1
z_STAB<-scale(disaster_data_SEA2_1991_2021$STAB_t1)
disaster_data_SEA2_1991_2021$STAB_t1<-as.numeric(z_STAB)
summary(disaster_data_SEA2_1991_2021$STAB_t1)

# 3) Optional diagnostics
# cat("STAB block: non-missing STAB_t1 n =",
#     sum(!is.na(df$STAB_t1)), "\n")
# suppressWarnings(print(
#   cor(df[, c("STAB_t1","EDI","BAI","ln_gdppc","urban_share")],
#       use = "pairwise.complete.obs")
# ))

# End of PSAV block

# ============================================================
# GV block (standalone) — Governance Volatility
#
# Creates:
#   dEDI, dBAI       : first differences
#   sd3_dEDI, sd3_dBAI : rolling 3-year SDs by country
#   GV_raw           : average of the two SDs
#   GV_resid         : GV_raw residualised on levels & FE
#   GV_t             : z-scored GV_resid (final RHS term)
#
# Requirements in df:
#   iso3, year
#   EDI, BAI         # institutional indices (levels)
#
# Behaviour:
#   1) Sort by iso3, year.
#   2) Build ΔEDI, ΔBAI.
#   3) For each country, compute rolling 3-year SD of ΔEDI, ΔBAI.
#   4) GV_raw = 0.5*(sd3_dEDI + sd3_dBAI).
#   5) Residualise GV_raw on {EDI, BAI} + FE(iso3, year).
#   6) GV_t = z-score of GV_resid across the panel.
# ============================================================

## ---- INPUT CHECKS ----
required_cols <- c("iso3c","year","EDI_SEA","BAI_SEA")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("GV block: missing required columns: %s",
               paste(missing, collapse = ", ")))
}

## ---- 0) Ensure sorted by country and year ----
disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021[order(disaster_data_SEA2_1991_2021$iso3c, disaster_data_SEA2_1991_2021$year), ]

## ---- 1) First differences of EDI and BAI by country ----
n <- nrow(disaster_data_SEA2_1991_2021)
dEDI <- rep(NA_real_, n)
dBAI <- rep(NA_real_, n)

split_idx <- split(seq_len(n), disaster_data_SEA2_1991_2021$iso3c)
for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  dEDI[i] <- c(NA, diff(disaster_data_SEA2_1991_2021$EDI_SEA[i]))
  dBAI[i] <- c(NA, diff(disaster_data_SEA2_1991_2021$BAI_SEA[i]))
}
disaster_data_SEA2_1991_2021$dEDI_SEA <- dEDI
disaster_data_SEA2_1991_2021$dBAI_SEA <- dBAI

## ---- 2) Rolling 3-year SD of dEDI and dBAI by country ----
sd3_dEDI <- rep(NA_real_, n)
sd3_dBAI <- rep(NA_real_, n)

for (idx in split_idx) {
  ord <- order(disaster_data_SEA2_1991_2021$year[idx], na.last = TRUE)
  i <- idx[ord]
  k_max <- length(i)
  if (k_max < 3) next  # need at least 3 years
  
  for (k in 1:k_max) {
    if (k < 3) {
      sd3_dEDI[i[k]] <- NA_real_
      sd3_dBAI[i[k]] <- NA_real_
    } else {
      window_idx <- i[(k-2):k]
      v1 <- disaster_data_SEA2_1991_2021$dEDI_SEA[window_idx]
      v2 <- disaster_data_SEA2_1991_2021$dBAI_SEA[window_idx]
      
      if (any(is.na(v1))) {
        sd3_dEDI[window_idx[3]] <- NA_real_
      } else {
        sd3_dEDI[window_idx[3]] <- stats::sd(v1)
      }
      
      if (any(is.na(v2))) {
        sd3_dBAI[window_idx[3]] <- NA_real_
      } else {
        sd3_dBAI[window_idx[3]] <- stats::sd(v2)
      }
    }
  }
}

disaster_data_SEA2_1991_2021$sd3_dEDI_SEA <- sd3_dEDI
disaster_data_SEA2_1991_2021$sd3_dBAI_SEA <- sd3_dBAI

## ---- 3) GV_raw: average of the two SDs ----
disaster_data_SEA2_1991_2021$GV_raw <- 0.5 * (disaster_data_SEA2_1991_2021$sd3_dEDI_SEA
                                              + disaster_data_SEA2_1991_2021$sd3_dBAI_SEA)

## ---- 4) Residualise GV_raw on levels + FE ----
gv_lm <- lm(
  GV_raw ~ EDI_SEA + BAI_SEA + factor(iso3c) + factor(year),
  data = disaster_data_SEA2_1991_2021,
  na.action = na.exclude
)
disaster_data_SEA2_1991_2021$GV_resid <- residuals(gv_lm)

## ---- 5) z-score GV_resid across the panel ----
z <- scale(disaster_data_SEA2_1991_2021$GV_resid)
disaster_data_SEA2_1991_2021$GV_t <- as.numeric(z)
summary(disaster_data_SEA2_1991_2021)

## ---- Optional diagnostics ----
# cat("GV block: non-missing GV_t n =",
#     sum(!is.na(df$GV_t)), "\n")
# suppressWarnings(print(
#   cor(df[, c("GV_t","EDI","BAI")],
#       use = "pairwise.complete.obs")
# ))

# End of GV block


# ============================================================
# W_edge block — Neighbour weights from Natural Earth
# using geosphere::distHaversine
#
# Requires packages:
#   rnaturalearth, rnaturalearthdata, sf, dplyr, geosphere
#
# Creates:
#   W_edge: data.frame with columns iso3_from, iso3_to, weight
#
# Parameters you can tweak:
#   country_iso3 : vector of your 11 ISO3 codes
#   cutoff_km    : max distance (km) to count as a neighbour
# ============================================================

# ---- Packages ----
suppressWarnings({
  if (!requireNamespace("rnaturalearth", quietly = TRUE))
    stop("Please install.packages('rnaturalearth')")
  if (!requireNamespace("rnaturalearthdata", quietly = TRUE))
    stop("Please install.packages('rnaturalearthdata')")
  if (!requireNamespace("sf", quietly = TRUE))
    stop("Please install.packages('sf')")
  if (!requireNamespace("dplyr", quietly = TRUE))
    stop("Please install.packages('dplyr')")
  if (!requireNamespace("geosphere", quietly = TRUE))
    stop("Please install.packages('geosphere')")
})

library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(dplyr)
library(geosphere)

# ---- 1) Parameters ----
country_iso3 <- c("BRN","KHM","IDN","LAO","MYS","MMR","PHL","SGP","THA","TLS","VNM")
cutoff_km    <- 1500  # neighbour distance threshold; adjust if you like

# ---- 2) Get country polygons from Natural Earth ----
world <- rnaturalearth::ne_countries(
  scale = "medium",      # or "small", "large"
  returnclass = "sf"
)

# Filter to your 11 countries using ISO3
sea_sf <- world %>%
  dplyr::filter(iso_a3 %in% country_iso3)

if (nrow(sea_sf) != length(country_iso3)) {
  warning("Some iso3 codes were not found in Natural Earth 'iso_a3'. Check country_iso3 vector.")
}

# ---- 3) Compute centroids (or point-on-surface) ----
sea_pts <- sf::st_point_on_surface(sea_sf$geometry)
coords_mat <- sf::st_coordinates(sea_pts)

coords <- data.frame(
  iso3 = sea_sf$iso_a3,
  lon  = coords_mat[, 1],
  lat  = coords_mat[, 2],
  stringsAsFactors = FALSE
)

# ---- 4) Build raw neighbour edges using geosphere::distHaversine ----
n <- nrow(coords)
iso3_from <- character(0)
iso3_to   <- character(0)
w_raw     <- numeric(0)

for (i in seq_len(n)) {
  for (j in seq_len(n)) {
    if (i == j) next  # no self-edges
    
    # distHaversine takes (lon, lat) in degrees and returns meters
    d_m <- geosphere::distHaversine(
      c(coords$lon[i], coords$lat[i]),
      c(coords$lon[j], coords$lat[j])
    )
    d_km <- d_m / 1000
    
    if (!is.na(d_km) && d_km <= cutoff_km) {
      iso3_from <- c(iso3_from, coords$iso3[i])
      iso3_to   <- c(iso3_to,   coords$iso3[j])
      w_raw     <- c(w_raw,     1 / (1 + d_km))  # inverse-distance
    }
  }
}

if (length(iso3_from) == 0) {
  stop("W_edge block: no neighbours found within cutoff_km; try increasing cutoff_km.")
}

W_edge <- data.frame(
  iso3_from = iso3_from,
  iso3_to   = iso3_to,
  weight    = w_raw,
  stringsAsFactors = FALSE
)

# ---- 5) Row-standardise so sum_j w_ij = 1 ----
row_sums <- tapply(W_edge$weight, W_edge$iso3_from, sum, na.rm = TRUE)
W_edge$w_sum <- row_sums[W_edge$iso3_from]

if (any(W_edge$w_sum == 0, na.rm = TRUE)) {
  warning("W_edge block: some iso3_from have weight sum = 0; they will have no effective neighbours.")
}

W_edge$weight <- W_edge$weight / W_edge$w_sum
W_edge$w_sum  <- NULL

# ---- Optional diagnostics ----
# print(W_edge)
# aggregate(weight ~ iso3_from, data = W_edge, sum)  # should be ~1 per iso3_from

# End of W_edge block


# ============================================================
# NS block (standalone) — Neighbour Spillover of Rapid-Year Shocks
#
# Creates: NS_t
#
# Concept:
#   For each country-year (i,t), NS_t = sum_j w_ij * rapid_year_jt,
#   where w_ij are neighbour weights (row-standardised for each i),
#   and rapid_year_jt is the rapid-disaster-year tag for neighbour j.
#
# Requirements in df:
#   df$iso3       : country code
#   df$year
#   df$rapid_year : 0/1 indicator that year t is a "rapid hazard" year for country i
#
# Requirements in W_edge (a separate data frame in your environment):
#   W_edge$iso3_from : origin country (i)
#   W_edge$iso3_to   : neighbour country (j)
#   W_edge$weight    : neighbour weight (need not be standardised; this block will normalise)
#
# Behaviour:
#   1) Checks inputs.
#   2) Row-standardises weights by iso3_from (sum_j w_ij = 1).
#   3) Builds NS_t by merging neighbour rapid_year_jt and taking weighted sums.
#   4) Merges NS_t back into df by (iso3, year).
# ============================================================

# ---- INPUT CHECKS ----
disaster_data_SEA2_1991_2021$rapid_year<-ifelse(disaster_data_SEA2_1991_2021$any_rapiddis=="TRUE",1,0)

required_df <- c("iso3c","year","rapid_year")
missing_df <- setdiff(required_df, names(disaster_data_SEA2_1991_2021))
if (length(missing_df) > 0) {
  stop(sprintf("NS block: df is missing required columns: %s",
               paste(missing_df, collapse = ", ")))
}

if (!exists("W_edge")) {
  stop("NS block: W_edge not found. Please create W_edge with columns iso3_from, iso3_to, weight.")
}

required_W <- c("iso3_from","iso3_to","weight")
missing_W <- setdiff(required_W, names(W_edge))
if (length(missing_W) > 0) {
  stop(sprintf("NS block: W_edge is missing required columns: %s",
               paste(missing_W, collapse = ", ")))
}

# ---- 1) Row-standardise weights by iso3_from ----
W_edge_norm <- W_edge
W_edge_norm$iso3_from <- as.character(W_edge_norm$iso3_from)
W_edge_norm$iso3_to   <- as.character(W_edge_norm$iso3_to)

row_sums <- tapply(W_edge_norm$weight, W_edge_norm$iso3_from, sum, na.rm = TRUE)
W_edge_norm$w_sum <- row_sums[W_edge_norm$iso3_from]

if (any(W_edge_norm$w_sum == 0, na.rm = TRUE)) {
  warning("NS block: some iso3_from have weight sum = 0; NS_t will be NA for those origins.")
}

W_edge_norm$weight_norm <- W_edge_norm$weight / W_edge_norm$w_sum
W_edge_norm <- W_edge_norm[, c("iso3_from","iso3_to","weight_norm")]

# ---- 2) Build NS_t ----
# Base panel: (i,t)
base_panel <- disaster_data_SEA2_1991_2021[, c("iso3c","year","rapid_year")]
base_panel$iso3c <- as.character(base_panel$iso3c)

# Attach neighbour list (i -> j)
tmp <- merge(
  base_panel,
  W_edge_norm,
  by.x = "iso3c",      # iso3 is i
  by.y = "iso3_from",
  all.x = TRUE,
  all.y = FALSE
)

# Attach neighbours' rapid_year_jt by matching on (iso3_to, year)
neigh_ry <- disaster_data_SEA2_1991_2021[, c("iso3c","year","rapid_year")]
names(neigh_ry) <- c("iso3_to","year","rapid_year_j")

tmp2 <- merge(
  tmp,
  neigh_ry,
  by = c("iso3_to","year"),
  all.x = TRUE,
  all.y = FALSE
)

if (!"rapid_year_j" %in% names(tmp2)) {
  stop("NS block: after merging, rapid_year_j is missing; check that df has rapid_year for all iso3 in W_edge.")
}

# Weighted contribution
tmp2$w_ry <- tmp2$weight_norm * tmp2$rapid_year_j

# Aggregate to (i,t): NS_t(i,t) = sum_j w_ij * rapid_year_jt
NS_df <- aggregate(
  w_ry ~ iso3c + year,
  data = tmp2,
  FUN = function(x) {
    if (all(is.na(x))) NA_real_ else sum(x, na.rm = TRUE)
  }
)
names(NS_df)[names(NS_df) == "w_ry"] <- "NS_t"

# ---- 3) Merge NS_t back into df ----
disaster_data_SEA2_1991_2021 <- merge(disaster_data_SEA2_1991_2021, NS_df, by = c("iso3c","year"), all.x = TRUE, all.y = FALSE)
disaster_data_SEA_1991_2021<-disaster_data_SEA2_1991_2021

# Optional diagnostics
# cat("NS block: non-missing NS_t n =",
#     sum(!is.na(df$NS_t)), "\n")

# End of NS block

# ============================================================
# LFS block (standalone) — Logistics Friction Shock
#
# Creates: frag_norm, z_freight, LFS_it
#
# Requirements in df:
#   iso3, year
#   frag_index    # static or slow-moving fragmentation measure (higher = more fragmented)
#   freight_index # global freight/logistics stress index (same value for all iso3 in a year)
#
# Behaviour:
#   1) Normalise frag_index to [0,1] across all countries (panel-wide).
#   2) Build a year-level z-scored freight series, optionally using t-1 lag.
#   3) LFS_it = frag_norm * z_freight_year.
# ============================================================

## ---- USER SETTING: lag the freight stress by one year? ----
use_freight_lag <- FALSE   # set FALSE if you want contemporaneous freight stress

## ---- INPUT CHECKS ----
required_cols <- c("iso3c","year","frag_index","freight_index")
missing <- setdiff(required_cols, names(disaster_data_SEA2_1991_2021))
if (length(missing) > 0) {
  stop(sprintf("LFS block: df is missing required columns: %s",
               paste(missing, collapse = ", ")))
}

# Ensure types
disaster_data_SEA2_1991_2021$iso3c <- as.character(disaster_data_SEA2_1991_2021$iso3c)

## ---- 1) Normalise frag_index to [0,1] across panel ----
frag_min <- min(disaster_data_SEA2_1991_2021$frag_index, na.rm = TRUE)
frag_max <- max(disaster_data_SEA2_1991_2021$frag_index, na.rm = TRUE)

if (!is.finite(frag_min) || !is.finite(frag_max)) {
  stop("LFS block: frag_index has no finite values.")
}

if (abs(frag_max - frag_min) < .Machine$double.eps) {
  warning("LFS block: frag_index has no cross-country variation; setting frag_norm = 0.")
  disaster_data_SEA2_1991_2021$frag_norm <- 0
} else {
  disaster_data_SEA2_1991_2021$frag_norm <- (disaster_data_SEA2_1991_2021$frag_index - frag_min) / (frag_max - frag_min)
}
## ---- 2) Build year-level z-scored freight series (optionally lagged) ----

# Step 2a: collapse freight_index to one value per year (mean across iso3s)
fi_year <- aggregate(freight_index ~ year, data = disaster_data_SEA2_1991_2021, FUN = function(x) mean(x, na.rm = TRUE))
fi_year <- fi_year[order(fi_year$year), ]

# Step 2b: optionally lag by 1 year
if (use_freight_lag) {
  fi_year$freight_used <- c(NA, head(fi_year$freight_index, -1))
} else {
  fi_year$freight_used <- fi_year$freight_index
}

# Step 2c: z-score freight_used across years
# (scale() handles NA; we coerce back to numeric)
fi_year$z_freight <- as.numeric(scale(fi_year$freight_used))

# Step 2d: merge z_freight back to df by year
disaster_data_SEA2_1991_2021 <- merge(disaster_data_SEA2_1991_2021, fi_year[, c("year","z_freight")],
            by = "year", all.x = TRUE, all.y = FALSE, sort = FALSE)

## ---- 3) Compute LFS_it = frag_norm * z_freight ----
disaster_data_SEA2_1991_2021$LFS_it <- disaster_data_SEA2_1991_2021$frag_norm * disaster_data_SEA2_1991_2021$z_freight
disaster_data_SEA_1991_2021<-disaster_data_SEA2_1991_2021

## ---- Optional diagnostics ----
# cat("LFS block: non-missing LFS_it n =",
#     sum(!is.na(df$LFS_it)), "\n")
# print(aggregate(LFS_it ~ iso3, data = df, FUN = function(x) c(min = min(x, na.rm=TRUE),
#                                                               max = max(x, na.rm=TRUE))))

# End of LFS block


# =============================================================================

# df  # columns at least:
# iso3   : country code
# year   : year
# region : region code (e.g. "SEA" or subregions if you want)
# EDI    : our democracy / electoral index
# BAI    : our bureaucratic autonomy index

#---- Global & regional EDI averages----

library(dplyr)

## GLOBAL mean
# Preparing V-DEM for the analysis (Putting back Brunei for robust results)

V_DEM_lim_selected<-V_DEM_lim[,-c(2,3)]
colnames(V_DEM_lim_selected)[2]<-"iso3c"
V_DEM_lim_selected<-bind_rows(V_DEM_lim_selected,brunei_components)
V_DEM_lim_selected<-V_DEM_lim_selected%>%filter(year>=1991&year<=2021)

edi_global <- V_DEM_lim_selected %>%
  group_by(year) %>%
  summarise(EDI_global = mean(v2x_polyarchy, na.rm = TRUE), .groups = "drop")

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(edi_global, by = "year")

## REGIONAL mean EDI_{r,t} (including i)

edi_regional<-disaster_data_SEA2_1991_2021%>%group_by(year)%>%
  summarise(EDI_regional=mean(EDI_SEA),.groups = "drop")

disaster_data_SEA2_1991_2021<-disaster_data_SEA2_1991_2021%>%
  left_join(edi_regional,by="year")
summary(disaster_data_SEA2_1991_2021)
disaster_data_SEA2_1991_2021_2<-disaster_data_SEA2_1991_2021 # Safety Check

# Global & regional LOO EDI

## Helper: sum and N per year

helper_global <- V_DEM_lim_selected %>%
  group_by(year) %>%
  summarise(
    EDI_sum = sum(v2x_polyarchy, na.rm = TRUE),
    N       = n(),
    .groups = "drop"
  )

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(helper_global, by = "year") %>%
  mutate(
    EDI_global_LOO = ifelse(
      N > 1,
      (EDI_sum - EDI_SEA) / (N - 1),
      NA_real_
    )
  ) %>%
  select(-EDI_sum, -N)

## Helper: sum and N per region-year
helper_reg <- disaster_data_SEA2_1991_2021 %>%
  group_by(year) %>%
  summarise(
    EDI_reg_sum = sum(EDI_SEA, na.rm = TRUE),
    N_reg       = n(),
    .groups     = "drop"
  )

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(helper_reg, by = c("year")) %>%
  mutate(
    EDI_region_LOO = ifelse(
      N_reg > 1,
      (EDI_reg_sum - EDI_SEA) / (N_reg - 1),
      NA_real_
    )
  ) %>%
  select(-EDI_reg_sum, -N_reg)


# Bartik (shift–share) democratization instruments

## baseline EDI per country = first-year EDI

base_edi <- V_DEM_lim_selected %>%
  arrange(iso3c, year) %>%
  group_by(iso3c) %>%
  slice(1) %>%         # first year for that iso3c
  ungroup() %>%
  select(iso3c, EDI_base = v2x_polyarchy)

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(base_edi, by = "iso3c")

## Global shock relative to first year in the sample
first_year <- min(disaster_data_SEA2_1991_2021$year, na.rm = TRUE)

edi_global_t0 <- edi_global %>%
  filter(year == first_year) %>%
  pull(EDI_global)

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  mutate(
    global_shock = EDI_global - edi_global_t0,
    Z_EDI_global_Bartik = EDI_base * global_shock
  )
summary(disaster_data_SEA2_1991_2021$Z_EDI_global_Bartik)

# Global & regional BAI averages

library(tidyverse)

## GLOBAL mean BAI_t

V_DEM_lim_selected<-V_DEM_lim_selected%>%mutate(
  Z_strictSEA=Z_score2(v2stcritrecadm),
  Z_stenSEA=Z_score2(v2strenadm),
  Z_clrspSEA=Z_score2(v2clrspct),
  BAI=(Z_strictSEA+Z_stenSEA+Z_clrspSEA)/3
)
bai_global <- V_DEM_lim_selected %>%
  group_by(year) %>%
  summarise(BAI_global = mean(BAI, na.rm = TRUE), .groups = "drop")

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(bai_global, by = "year")

## REGIONAL mean BAI_{r,t} (including i)

bai_region <- disaster_data_SEA2_1991_2021 %>%
  group_by(year) %>%
  summarise(BAI_region = mean(BAI_SEA, na.rm = TRUE), .groups = "drop")

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(bai_region, by = c("year"))

## Helper: sum and N per year
helper_global_bai <- V_DEM_lim_selected %>%
  group_by(year) %>%
  summarise(
    BAI_sum = sum(BAI, na.rm = TRUE),
    N_bai   = n(),
    .groups = "drop"
  )

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(helper_global_bai, by = "year") %>%
  mutate(
    BAI_global_LOO = ifelse(
      N_bai > 1,
      (BAI_sum - BAI_SEA) / (N_bai - 1),
      NA_real_
    )
  ) %>%
  select(-BAI_sum, -N_bai)

## Helper: sum and N per region-year
helper_reg_bai <- disaster_data_SEA2_1991_2021 %>%
  group_by(year) %>%
  summarise(
    BAI_reg_sum = sum(BAI_SEA, na.rm = TRUE),
    N_reg_bai   = n(),
    .groups     = "drop"
  )

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(helper_reg_bai, by = c("year")) %>%
  mutate(
    BAI_region_LOO = ifelse(
      N_reg_bai > 1,
      (BAI_reg_sum - BAI_SEA) / (N_reg_bai - 1),
      NA_real_
    )
  ) %>%
  select(-BAI_reg_sum, -N_reg_bai)

summary(disaster_data_SEA2_1991_2021)

## baseline BAI per country = first-year BAI
bai_base <- V_DEM_lim_selected %>%
  arrange(iso3c, year) %>%
  group_by(iso3c) %>%
  slice(1) %>%
  ungroup() %>%
  select(iso3c, BAI_base = BAI)

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  left_join(bai_base, by = "iso3c")
## Global baseline year
first_year <- min(disaster_data_SEA2_1991_2021$year, na.rm = TRUE)

bai_global_t0 <- bai_global %>%
  filter(year == first_year) %>%
  pull(BAI_global)

disaster_data_SEA2_1991_2021 <- disaster_data_SEA2_1991_2021 %>%
  mutate(
    global_shock_BAI = BAI_global - bai_global_t0,
    Z_BAI_global_Bartik = BAI_base * global_shock_BAI
  )

summary(disaster_data_SEA2_1991_2021)

disaster_data_SEA_1991_2021<-disaster_data_SEA2_1991_2021#Safety Check

# Write into final SEA-full file
library(xlsx)
library(tidyverse)
write_csv(disaster_data_SEA2_1991_2021,"Main Data Set for the Project.csv")
write.xlsx(disaster_data_SEA2_1991_2021,"Main Data Set for the Project.xlsx")

# End of Data wrangling and wrapping (Hopefully! :)

# It was not the end :( ! So let's add new variables :D

OECD_CRS_SEA<-OECD_DCD_FSD_DSD_CRS_DF_CRS_1_5_DAC_KHM_IDN_LAO_MYS_PHL_THA_TLS_VNM_MMR_43060_700_740_74020_100_T_T_D_Q_T_
head(OECD_CRS_SEA)
summary(OECD_CRS_SEA)

library(dplyr)
library(tidyr)
library(janitor)
library(lubridate)
library(stringr)
library(tidyr)

# OECD CRS

wide_sector_OECD <-OECD_CRS_SEA %>%
  select(RECIPIENT, TIME_PERIOD, Sector, OBS_VALUE) %>%
  pivot_wider(
    id_cols     = c(RECIPIENT, TIME_PERIOD),
    names_from  = Sector,
    values_from = OBS_VALUE
  ) %>%
  clean_names() %>%
  arrange(recipient, time_period)
names(wide_sector_OECD)
colnames(wide_sector_OECD)[1]<-"iso3c"
colnames(wide_sector_OECD)[2]<-"year"

disaster_trial<-disaster_data_SEA2_1991_2021
disaster_trial<-disaster_trial%>%full_join(wide_sector_OECD,select(iso3c,year,disaster_prevention_and_preparedness,
                                                                   multi_hazard_response_preparedness,disaster_risk_reduction,
                                                                   humanitarian_aid),by=c("iso3c","year"))
# Using ODA from World Bank

library(WDI)
library(countrycode)

ODA_SEA<-WDI(country = sea_iso3AB,indicator = "DT.ODA.ODAT.CD",start = 1991,end = 2021)
names(ODA_SEA)
colnames(ODA_SEA)[5]<-"NETODA"
ODA_SEA<-subset(ODA_SEA,select = c("iso3c","year","NETODA"))

disaster_trial<-disaster_trial%>%full_join(ODA_SEA,select(is03c,year,NETODA),by=c("iso3c","year"))

# Political Data set for protests demonstrations and so on : ACLED

acled_SEA<-Asia_Pacific_aggregated_data_up_to_2026_02_07%>%filter(COUNTRY%in%SEA_Full2)
event_list<-c("Protests","Riots","iolence against civilians")
acled_SEA<-acled_SEA%>%filter(EVENT_TYPE%in%event_list)

acled_SEA <- acled_SEA %>%
  mutate(
    event_date = ymd(WEEK),
    year = year(event_date)
  )
table(acled_SEA$SUB_EVENT_TYPE)
acled_country_year <- acled_SEA %>%
  group_by(COUNTRY, year) %>%
  summarise(
    acled_protests     = sum(EVENT_TYPE == "Protests", na.rm = TRUE),
    acled_riots        = sum(EVENT_TYPE == "Riots", na.rm = TRUE),
    acled_vac          = sum(EVENT_TYPE == "Violence against civilians", na.rm = TRUE),
    acled_exfap        =sum(SUB_EVENT_TYPE=="Excessive force against protesters",na.rm = TRUE),
    acled_mob           =sum(SUB_EVENT_TYPE=="Mob violence",na.rm = TRUE),
    acled_peaceful     =sum(SUB_EVENT_TYPE=="Peaceful protest",na.rm = TRUE),
    acled_pwi          =sum(SUB_EVENT_TYPE=="Protest with intervention",na.rm = TRUE),
    acled_violentdem   =sum(SUB_EVENT_TYPE=="Violent demonstration",na.rm = TRUE),
    .groups = "drop"
  )

names(acled_country_year)
acled_country_year$iso3c<-countrycode(acled_country_year$COUNTRY,origin = "country.name",destination = "iso3c",
                                      custom_match = c("Indonesia" = "IDN"))

disaster_trial<-disaster_trial%>%full_join(acled_country_year,select(year,iso3c,acled_protests,
                                                                     acled_riots,acled_vac,acled_exfap,
                                                                     acled_mob,acled_peaceful,acled_pwi,
                                                                     acled_violentdem),by=c("iso3c","year"))

# Checking data set with new variables

cols_to_chec<-c("disaster_prevention_and_preparedness",
                "multi_hazard_response_preparedness","disaster_risk_reduction",
                "humanitarian_aid","NETODA","acled_protests","acled_riots","acled_vac","acled_exfap","acled_mob",
                "acled_peaceful","acled_pwi","acled_violentdem")

years_complete_sel <- disaster_trial %>%
  group_by(year) %>%
  summarise(all_complete = all(complete.cases(across(all_of(cols_to_chec)))), .groups = "drop") %>%
  filter(all_complete) %>%
  pull(year)

years_complete_sel
skimr::skim(disaster_trial)
View(disaster_trial)
summary(disaster_trial$year)
disaster_trial<-disaster_trial%>%filter(year>=1991&year<=2021) #limiting data to 1991-2021

# Saving the modified data set (fingers crossed for the last modification :D)

library(xlsx)
library(tidyverse)

write_csv(disaster_trial,"Main Data (modified) Set for the Project.csv") # CSV document
write.xlsx(disaster_trial,"Main Data (modified) Set for the Project.xlsx") # Excel document
















































































































































































































